Abstract

OBJECTIVE :

The purpose of this model is to analyse the pattern and spread of the COVID-19 from January 2020 onwards. A variety of packages were used for this exercise.

#install.packages("kableExtra")
suppressMessages(library(magrittr)) # pipe operations
suppressMessages(library(lubridate)) # date operations
suppressMessages(library(tidyverse)) # ggplot2, tidyr, dplyr...
suppressMessages(library(gridExtra)) # multiple grid-based plots on a page
suppressMessages(library(ggforce)) # accelerating ggplot2
suppressMessages(library(kableExtra)) # complex tables
suppressMessages(library(leaflet)) #for map
suppressMessages(library(plotly)) #plotly

Data Ingestion :

Reading data from the COVID-19 folder which gets updated everyday. It contains data for the whole world.

confirmed <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")

death <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")

recovered <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")

Data Cleaning,Manipulation and Visualisation

Data Cleaning

Verifying the data and changing the data into the desirable format.

confirmed[1:10, 1:10]
##                  Province.State      Country.Region      Lat     Long X1.22.20
## 1                                       Afghanistan  33.0000  65.0000        0
## 2                                           Albania  41.1533  20.1683        0
## 3                                           Algeria  28.0339   1.6596        0
## 4                                           Andorra  42.5063   1.5218        0
## 5                                            Angola -11.2027  17.8739        0
## 6                               Antigua and Barbuda  17.0608 -61.7964        0
## 7                                         Argentina -38.4161 -63.6167        0
## 8                                           Armenia  40.0691  45.0382        0
## 9  Australian Capital Territory           Australia -35.4735 149.0124        0
## 10              New South Wales           Australia -33.8688 151.2093        0
##    X1.23.20 X1.24.20 X1.25.20 X1.26.20 X1.27.20
## 1         0        0        0        0        0
## 2         0        0        0        0        0
## 3         0        0        0        0        0
## 4         0        0        0        0        0
## 5         0        0        0        0        0
## 6         0        0        0        0        0
## 7         0        0        0        0        0
## 8         0        0        0        0        0
## 9         0        0        0        0        0
## 10        0        0        0        3        4
col <- ncol(confirmed)
## get dates from column names
dates <- names(confirmed)[5:col] %>% substr(2,8) %>% mdy()
range(dates)
## [1] "2020-01-22" "2020-03-26"
## [1] "2020-01-22" "2020-03-22"
min_date <- min(dates)
max_date <- max(dates)
min_date_formt <- min_date %>% format('%d %b %Y')
max_date_formt <- max_date %>% format('%d %b %Y')

Data Manipulation

Data Cleaning , Manipulation and Visualisation was performed. We can see the bigger radius shows a few countried which have the highest number of people affected by COVID-19.

cleanData <- function(data) {
## remove some columns
data %<>% select(-c(Province.State, Lat, Long)) %>% rename(country=Country.Region)
## convert from wide to long format
data %<>% gather(key=date, value=count, -country)
## convert from character to date
data %<>% mutate(date = date %>% substr(2,8) %>% mdy())
## aggregate by country
data %<>% group_by(country, date) %>% summarise(count=sum(count, na.rm=T)) %>% as.data.frame()
return(data)
}

data_confirmed <- confirmed %>% cleanData() %>% rename(confirmed=count)
data_deaths <- death %>% cleanData() %>% rename(deaths=count)
data_recovered <- recovered %>% cleanData() %>% rename(recovered=count)

## merge above 3 datasets into one, by country and date
data <- data_confirmed %>% merge(data_deaths) %>% merge(data_recovered)

## countries/regions with confirmed cases, excl. cruise ships
countries <- data %>% pull(country) %>% setdiff('Cruise Ship')

## first 10 records when it first broke out in India
Ind <- data %>% filter(country=='India')
p <-ggplot(data= Ind, mapping = aes(x= date, y= confirmed)) + geom_bar(stat= "identity", fill = "#56B4E9")
#ggplotly(p)

p1 <- ggplot(data= Ind, mapping = aes(x= date, y= deaths)) + geom_bar(stat= "identity", fill = "#CC0000")
#ggplotly(p1)

p2 <- ggplot(data= Ind, mapping = aes(x= date, y= recovered)) + geom_bar(stat= "identity", fill = "#00FF00")
#ggplotly(p2)
subplot(p, p1, p2, margin = 0.1, nrows = 3, titleX = TRUE)

Map

Visualizing data in the form of Map

## counts for the whole world
data_world <- data %>% group_by(date) %>%
summarise(country='World',
confirmed = sum(confirmed),
deaths = sum(deaths),
recovered = sum(recovered))
data %<>% rbind(data_world)
## current confirmed cases
data %<>% mutate(current_confirmed = confirmed - deaths - recovered)
## select last column, which is the number of latest confirmed cases
x <- confirmed
x$confirmed <- x[, ncol(x)]
x %<>% select(c(Country.Region, Province.State, Lat, Long, confirmed)) %>%
mutate(txt=paste0(Country.Region, ' - ', Province.State, ': ', confirmed))
m <- leaflet(width=1200, height=800) %>% addTiles()
# circle marker (units in pixels)
m %<>% addCircleMarkers(x$Long, x$Lat,
radius=2+log2(x$confirmed), stroke=F,
color='red', fillOpacity=0.3,
popup=x$txt)
# world
m

References :

COVID-19 Data Analysis with R - Worldwide : Yanchang Zhao,,http:// RDataMining.com

Data was taken from https://github.com/CSSEGISandData

LS0tCnRpdGxlOiAiQ09WSUQtMTkiCmF1dGhvcjogIkt1bWFyaSBTdW5kYXJhbSIKZGF0ZTogImByIGZvcm1hdChTeXMudGltZSgpLCAnJVggJWQgJUIsICVZJylgIgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6CiAgICBmaWdfd2lkdGg6IDYgCiAgICBmaWdfaGVpZ2h0OiA0CiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIGNvZGVfZG93bmxvYWQgOiB0cnVlCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgojIHsudGFic2V0IC50YWJzZXQtZmFkZSAudGFic2V0LXBpbGxzfQoKIyMgX19BYnN0cmFjdF9fCgoqKk9CSkVDVElWRSoqIDoKCj5UaGUgcHVycG9zZSBvZiB0aGlzIG1vZGVsIGlzIHRvIGFuYWx5c2UgdGhlIHBhdHRlcm4gYW5kIHNwcmVhZCBvZiB0aGUgQ09WSUQtMTkgZnJvbSBKYW51YXJ5IDIwMjAgb253YXJkcy4gQSB2YXJpZXR5IG9mIHBhY2thZ2VzIHdlcmUgdXNlZCBmb3IgdGhpcyBleGVyY2lzZS4KCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygia2FibGVFeHRyYSIpCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShtYWdyaXR0cikpICMgcGlwZSBvcGVyYXRpb25zCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShsdWJyaWRhdGUpKSAjIGRhdGUgb3BlcmF0aW9ucwpzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkodGlkeXZlcnNlKSkgIyBnZ3Bsb3QyLCB0aWR5ciwgZHBseXIuLi4Kc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KGdyaWRFeHRyYSkpICMgbXVsdGlwbGUgZ3JpZC1iYXNlZCBwbG90cyBvbiBhIHBhZ2UKc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KGdnZm9yY2UpKSAjIGFjY2VsZXJhdGluZyBnZ3Bsb3QyCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShrYWJsZUV4dHJhKSkgIyBjb21wbGV4IHRhYmxlcwpzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkobGVhZmxldCkpICNmb3IgbWFwCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShwbG90bHkpKSAjcGxvdGx5CmBgYAoKKipEYXRhIEluZ2VzdGlvbioqIDoKCj5SZWFkaW5nIGRhdGEgZnJvbSB0aGUgQ09WSUQtMTkgZm9sZGVyIHdoaWNoIGdldHMgdXBkYXRlZCBldmVyeWRheS4gSXQgY29udGFpbnMgZGF0YSBmb3IgdGhlIHdob2xlIHdvcmxkLgoKYGBge3J9CmNvbmZpcm1lZCA8LSByZWFkLmNzdigiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL0NTU0VHSVNhbmREYXRhL0NPVklELTE5L21hc3Rlci9jc3NlX2NvdmlkXzE5X2RhdGEvY3NzZV9jb3ZpZF8xOV90aW1lX3Nlcmllcy90aW1lX3Nlcmllc19jb3ZpZDE5X2NvbmZpcm1lZF9nbG9iYWwuY3N2IikKCmRlYXRoIDwtIHJlYWQuY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vQ1NTRUdJU2FuZERhdGEvQ09WSUQtMTkvbWFzdGVyL2Nzc2VfY292aWRfMTlfZGF0YS9jc3NlX2NvdmlkXzE5X3RpbWVfc2VyaWVzL3RpbWVfc2VyaWVzX2NvdmlkMTlfZGVhdGhzX2dsb2JhbC5jc3YiKQoKcmVjb3ZlcmVkIDwtIHJlYWQuY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vQ1NTRUdJU2FuZERhdGEvQ09WSUQtMTkvbWFzdGVyL2Nzc2VfY292aWRfMTlfZGF0YS9jc3NlX2NvdmlkXzE5X3RpbWVfc2VyaWVzL3RpbWVfc2VyaWVzX2NvdmlkMTlfcmVjb3ZlcmVkX2dsb2JhbC5jc3YiKQoKYGBgCiMgey50YWJzZXQgLnRhYnNldC1mYWRlIC50YWJzZXQtcGlsbHN9CiMjIF9fRGF0YSBDbGVhbmluZyxNYW5pcHVsYXRpb24gYW5kIFZpc3VhbGlzYXRpb25fXyAKIyMjIHsudGFic2V0IC50YWJzZXQtZmFkZX0KIyMjIyBfX0RhdGEgQ2xlYW5pbmdfXyAKVmVyaWZ5aW5nIHRoZSBkYXRhIGFuZCBjaGFuZ2luZyB0aGUgZGF0YSBpbnRvIHRoZSBkZXNpcmFibGUgZm9ybWF0LgpgYGB7cn0KY29uZmlybWVkWzE6MTAsIDE6MTBdCmNvbCA8LSBuY29sKGNvbmZpcm1lZCkKIyMgZ2V0IGRhdGVzIGZyb20gY29sdW1uIG5hbWVzCmRhdGVzIDwtIG5hbWVzKGNvbmZpcm1lZClbNTpjb2xdICU+JSBzdWJzdHIoMiw4KSAlPiUgbWR5KCkKcmFuZ2UoZGF0ZXMpCiMjIFsxXSAiMjAyMC0wMS0yMiIgIjIwMjAtMDMtMjIiCm1pbl9kYXRlIDwtIG1pbihkYXRlcykKbWF4X2RhdGUgPC0gbWF4KGRhdGVzKQptaW5fZGF0ZV9mb3JtdCA8LSBtaW5fZGF0ZSAlPiUgZm9ybWF0KCclZCAlYiAlWScpCm1heF9kYXRlX2Zvcm10IDwtIG1heF9kYXRlICU+JSBmb3JtYXQoJyVkICViICVZJykKYGBgCgoKCiMjIyMgX19EYXRhIE1hbmlwdWxhdGlvbl9fIAoKRGF0YSBDbGVhbmluZyAsIE1hbmlwdWxhdGlvbiBhbmQgVmlzdWFsaXNhdGlvbiB3YXMgcGVyZm9ybWVkLiBXZSBjYW4gc2VlIHRoZSBiaWdnZXIgcmFkaXVzIHNob3dzIGEgZmV3IGNvdW50cmllZCB3aGljaCBoYXZlIHRoZSBoaWdoZXN0IG51bWJlciBvZiBwZW9wbGUgYWZmZWN0ZWQgYnkgQ09WSUQtMTkuCmBgYHtyLCB3YXJuaW5nPUZBTFNFfQpjbGVhbkRhdGEgPC0gZnVuY3Rpb24oZGF0YSkgewojIyByZW1vdmUgc29tZSBjb2x1bW5zCmRhdGEgJTw+JSBzZWxlY3QoLWMoUHJvdmluY2UuU3RhdGUsIExhdCwgTG9uZykpICU+JSByZW5hbWUoY291bnRyeT1Db3VudHJ5LlJlZ2lvbikKIyMgY29udmVydCBmcm9tIHdpZGUgdG8gbG9uZyBmb3JtYXQKZGF0YSAlPD4lIGdhdGhlcihrZXk9ZGF0ZSwgdmFsdWU9Y291bnQsIC1jb3VudHJ5KQojIyBjb252ZXJ0IGZyb20gY2hhcmFjdGVyIHRvIGRhdGUKZGF0YSAlPD4lIG11dGF0ZShkYXRlID0gZGF0ZSAlPiUgc3Vic3RyKDIsOCkgJT4lIG1keSgpKQojIyBhZ2dyZWdhdGUgYnkgY291bnRyeQpkYXRhICU8PiUgZ3JvdXBfYnkoY291bnRyeSwgZGF0ZSkgJT4lIHN1bW1hcmlzZShjb3VudD1zdW0oY291bnQsIG5hLnJtPVQpKSAlPiUgYXMuZGF0YS5mcmFtZSgpCnJldHVybihkYXRhKQp9CgpkYXRhX2NvbmZpcm1lZCA8LSBjb25maXJtZWQgJT4lIGNsZWFuRGF0YSgpICU+JSByZW5hbWUoY29uZmlybWVkPWNvdW50KQpkYXRhX2RlYXRocyA8LSBkZWF0aCAlPiUgY2xlYW5EYXRhKCkgJT4lIHJlbmFtZShkZWF0aHM9Y291bnQpCmRhdGFfcmVjb3ZlcmVkIDwtIHJlY292ZXJlZCAlPiUgY2xlYW5EYXRhKCkgJT4lIHJlbmFtZShyZWNvdmVyZWQ9Y291bnQpCgojIyBtZXJnZSBhYm92ZSAzIGRhdGFzZXRzIGludG8gb25lLCBieSBjb3VudHJ5IGFuZCBkYXRlCmRhdGEgPC0gZGF0YV9jb25maXJtZWQgJT4lIG1lcmdlKGRhdGFfZGVhdGhzKSAlPiUgbWVyZ2UoZGF0YV9yZWNvdmVyZWQpCgojIyBjb3VudHJpZXMvcmVnaW9ucyB3aXRoIGNvbmZpcm1lZCBjYXNlcywgZXhjbC4gY3J1aXNlIHNoaXBzCmNvdW50cmllcyA8LSBkYXRhICU+JSBwdWxsKGNvdW50cnkpICU+JSBzZXRkaWZmKCdDcnVpc2UgU2hpcCcpCgojIyBmaXJzdCAxMCByZWNvcmRzIHdoZW4gaXQgZmlyc3QgYnJva2Ugb3V0IGluIEluZGlhCkluZCA8LSBkYXRhICU+JSBmaWx0ZXIoY291bnRyeT09J0luZGlhJykKcCA8LWdncGxvdChkYXRhPSBJbmQsIG1hcHBpbmcgPSBhZXMoeD0gZGF0ZSwgeT0gY29uZmlybWVkKSkgKyBnZW9tX2JhcihzdGF0PSAiaWRlbnRpdHkiLCBmaWxsID0gIiM1NkI0RTkiKQojZ2dwbG90bHkocCkKCnAxIDwtIGdncGxvdChkYXRhPSBJbmQsIG1hcHBpbmcgPSBhZXMoeD0gZGF0ZSwgeT0gZGVhdGhzKSkgKyBnZW9tX2JhcihzdGF0PSAiaWRlbnRpdHkiLCBmaWxsID0gIiNDQzAwMDAiKQojZ2dwbG90bHkocDEpCgpwMiA8LSBnZ3Bsb3QoZGF0YT0gSW5kLCBtYXBwaW5nID0gYWVzKHg9IGRhdGUsIHk9IHJlY292ZXJlZCkpICsgZ2VvbV9iYXIoc3RhdD0gImlkZW50aXR5IiwgZmlsbCA9ICIjMDBGRjAwIikKI2dncGxvdGx5KHAyKQpzdWJwbG90KHAsIHAxLCBwMiwgbWFyZ2luID0gMC4xLCBucm93cyA9IDMsIHRpdGxlWCA9IFRSVUUpCmBgYAoKCiMjIyMgX19NYXBfXyAKVmlzdWFsaXppbmcgZGF0YSBpbiB0aGUgZm9ybSBvZiBNYXAKYGBge3IsIHdhcm5pbmc9RkFMU0V9CiMjIGNvdW50cyBmb3IgdGhlIHdob2xlIHdvcmxkCmRhdGFfd29ybGQgPC0gZGF0YSAlPiUgZ3JvdXBfYnkoZGF0ZSkgJT4lCnN1bW1hcmlzZShjb3VudHJ5PSdXb3JsZCcsCmNvbmZpcm1lZCA9IHN1bShjb25maXJtZWQpLApkZWF0aHMgPSBzdW0oZGVhdGhzKSwKcmVjb3ZlcmVkID0gc3VtKHJlY292ZXJlZCkpCmRhdGEgJTw+JSByYmluZChkYXRhX3dvcmxkKQojIyBjdXJyZW50IGNvbmZpcm1lZCBjYXNlcwpkYXRhICU8PiUgbXV0YXRlKGN1cnJlbnRfY29uZmlybWVkID0gY29uZmlybWVkIC0gZGVhdGhzIC0gcmVjb3ZlcmVkKQpgYGAKYGBge3Isd2FybmluZz1GQUxTRX0KIyMgc2VsZWN0IGxhc3QgY29sdW1uLCB3aGljaCBpcyB0aGUgbnVtYmVyIG9mIGxhdGVzdCBjb25maXJtZWQgY2FzZXMKeCA8LSBjb25maXJtZWQKeCRjb25maXJtZWQgPC0geFssIG5jb2woeCldCnggJTw+JSBzZWxlY3QoYyhDb3VudHJ5LlJlZ2lvbiwgUHJvdmluY2UuU3RhdGUsIExhdCwgTG9uZywgY29uZmlybWVkKSkgJT4lCm11dGF0ZSh0eHQ9cGFzdGUwKENvdW50cnkuUmVnaW9uLCAnIC0gJywgUHJvdmluY2UuU3RhdGUsICc6ICcsIGNvbmZpcm1lZCkpCm0gPC0gbGVhZmxldCh3aWR0aD0xMjAwLCBoZWlnaHQ9ODAwKSAlPiUgYWRkVGlsZXMoKQojIGNpcmNsZSBtYXJrZXIgKHVuaXRzIGluIHBpeGVscykKbSAlPD4lIGFkZENpcmNsZU1hcmtlcnMoeCRMb25nLCB4JExhdCwKcmFkaXVzPTIrbG9nMih4JGNvbmZpcm1lZCksIHN0cm9rZT1GLApjb2xvcj0ncmVkJywgZmlsbE9wYWNpdHk9MC4zLApwb3B1cD14JHR4dCkKIyB3b3JsZAptCmBgYAoqKlJlZmVyZW5jZXMqKiA6Cgo+Q09WSUQtMTkgRGF0YSBBbmFseXNpcyB3aXRoIFIgLSBXb3JsZHdpZGUgOiBZYW5jaGFuZyBaaGFvLHlhbmNoYW5nQFJEYXRhTWluaW5nLmNvbSxodHRwOi8vIFJEYXRhTWluaW5nLmNvbQoKPkRhdGEgd2FzIHRha2VuIGZyb20gaHR0cHM6Ly9naXRodWIuY29tL0NTU0VHSVNhbmREYXRhCg==